
# A5c_price_index_gini
#==============================================================================

# Description: This code computes the Gini coefficients of real income on aggregate
# in the entry-only and full model


rm(list = ls())
options(warn=-1)

library('data.table')
library('stargazer')
library('ggplot2')
library('reldist')
library('matlab')


# Import and format data
#===============================================================================

load('data_PI_entry_gini.RData')
data_entry <- data

load('data_PI_full_adj_gini.RData')
data_full <- data

data_entry[ , diff := P_counter_sorted_20 / P_true_sorted_20 - P_counter_sorted_80 / P_true_sorted_80]
data_entry[ , u_beta_true_20 := log(P_true_sorted_20) - 1]                                     
data_entry[ , u_beta_true_80 := log(P_true_sorted_80) - 1]                                    
data_entry[ , u_beta_counter_20 := log(P_counter_sorted_20) - 1]
data_entry[ , u_beta_counter_80 := log(P_counter_sorted_80) - 1]

data_full[ , diff := P_counter_sorted_20 / P_true_sorted_20 - P_counter_sorted_80 / P_true_sorted_80]
data_full[ , u_beta_true_20 := log(P_true_sorted_20) - 1]                                     
data_full[ , u_beta_true_80 := log(P_true_sorted_80) - 1]                                    
data_full[ , u_beta_counter_20 := log(P_counter_sorted_20) - 1]
data_full[ , u_beta_counter_80 := log(P_counter_sorted_80) - 1]

data_entry[, index_u_beta := (abs(u_beta_counter_80) < 1e-13 | abs(u_beta_counter_20) < 1e-13 | abs(u_beta_true_80) < 1e-13 | abs(u_beta_true_20) < 1e-13) == F]
data_full[, index_u_beta := (abs(u_beta_counter_80) < 1e-13 | abs(u_beta_counter_20) < 1e-13 | abs(u_beta_true_80) < 1e-13 | abs(u_beta_true_20) < 1e-13) == F]

# Format Income Draws
#-------------------------------------------------------------------------------

Y = data_entry[ , 17:116]
Y_mat <- as.matrix(Y)
Y <- Y[, 21:80]
Y_PI <- data_entry[ , c(1:3, 17:116)]
Y_PI <- Y_PI[ , .SD[1], by = c('Year', 'Quarter', 'Declarant')]
setkey(Y_PI, Year, Quarter, Declarant)
Y_PI <- Y_PI[ , 4:103, with = F]
Y_PI <- Y_PI[ , 20:80, with = F]

# Format Price Indexes
#-------------------------------------------------------------------------------

P_true_entry = data_entry[ , 117:216]
P_true_mat_entry <- as.matrix(P_true_entry)
P_true_entry <- P_true_entry[, 21:80]
P_counter_entry = data_entry[ , 217:316]
P_counter_mat_entry <- as.matrix(P_counter_entry)
P_counter_entry <- P_counter_entry[, 21:80]

P_true_full = data_full[ , 127:226]
P_true_mat_full <- as.matrix(P_true_full)
P_true_full <- P_true_full[, 21:80]
P_counter_full = data_full[ , 227:326]
P_counter_mat_full <- as.matrix(P_counter_full)
P_counter_full <- P_counter_full[, 21:80]

# Format Price Coefficients
#-------------------------------------------------------------------------------

alpha_price <- data_entry[ , .(alpha_price)]
alpha_1 <- data_entry[ , .(alpha_1)]

alpha_price_mat <- as.matrix(alpha_price)
alpha_price_mat <- repmat(alpha_price_mat, 1, 100)
alpha_1_mat <- as.matrix(alpha_1)
alpha_1_mat <- repmat(alpha_1_mat, 1, 100)
beta_price <- alpha_price_mat + alpha_1_mat * Y_mat


# Compute Price Indexs
#============================================================================================================

data_PI <- data_entry
data_PI_entry <- data_entry
v_true_entry <- beta_price * log(P_true_mat_entry / (exp(1) * gamma(1 - (1/beta_price))))
v_counter_entry <- beta_price * log(P_counter_mat_entry / (exp(1) * gamma(1 - (1/beta_price))))

data_PI_full <- data_full
v_true_full <- beta_price * log(P_true_mat_full / (exp(1) * gamma(1 - (1/beta_price))))
v_counter_full <- beta_price * log(P_counter_mat_full / (exp(1) * gamma(1 - (1/beta_price))))

PI_gini_summary <- data_PI[ , 1:4]
PI_gini_summary <- PI_gini_summary[ , .SD[1], by = c('Year', 'Quarter', 'Declarant')]

w = fread("weights_i_k_gini.csv")
colnames(w)[1:4] = c("product", "Year", "Quarter", "Declarant")


for (i in 20:80) {
  
print(i)    
  
w_sub = w[, c(1:4, 4 + i), with = F]
colnames(w_sub)[5] = "s_k"  
data_PI_gini = merge(data_PI, w_sub, by = c('product', 'Year', 'Quarter', 'Declarant'))

data_PI_gini <- cbind(data_PI_gini[ , 1:4], data_PI$index_mc, data_PI_full$index_mc, data_PI$diff, data_PI_full$diff, data_PI$index_u_beta, data_PI_full$index_u_beta, data_PI_gini$s_k)
colnames(data_PI_gini)[5:11] <- c("index_mc_entry", "index_mc_full", "diff_entry", "diff_full", "index_u_beta_entry", "index_u_beta_full", "s_k")
data_PI_gini <- data_PI_gini[ , beta_price_temp := beta_price[ , i]]
data_PI_gini <- data_PI_gini[ , P_true_temp_entry := P_true_mat_entry[ , i]]
data_PI_gini <- data_PI_gini[ , P_counter_temp_entry := P_counter_mat_entry[ , i]]
data_PI_gini <- data_PI_gini[ , P_true_temp_full := P_true_mat_full[ , i]]
data_PI_gini <- data_PI_gini[ , P_counter_temp_full := P_counter_mat_full[ , i]]
data_PI_gini[ , v_true_temp_entry := beta_price[ , i] * log(P_true_mat_entry[ , i] / (exp(1) * gamma(1 - (1/beta_price[ , i]))))]
data_PI_gini[ , v_counter_temp_entry := beta_price[ , i] * log(P_counter_mat_entry[ , i] / (exp(1) * gamma(1 - (1/beta_price[ , i]))))]
data_PI_gini[ , v_true_temp_full := beta_price[ , i] * log(P_true_mat_full[ , i] / (exp(1) * gamma(1 - (1/beta_price[ , i]))))]
data_PI_gini[ , v_counter_temp_full := beta_price[ , i] * log(P_counter_mat_full[ , i] / (exp(1) * gamma(1 - (1/beta_price[ , i]))))]

# Remove Extreme Observations
#-------------------------------------------------------------------------------

data_PI_gini <- data_PI_gini[is.na(v_true_temp_entry) == F & v_true_temp_entry != Inf]
data_PI_gini <- data_PI_gini[is.na(v_counter_temp_entry) == F & v_counter_temp_entry != Inf]
data_PI_gini <- data_PI_gini[is.na(v_true_temp_full) == F & v_true_temp_full != Inf]
data_PI_gini <- data_PI_gini[is.na(v_counter_temp_full) == F & v_counter_temp_full != Inf]
data_PI_gini <- data_PI_gini[beta_price_temp < 0]
data_PI_gini <- data_PI_gini[beta_price_temp > -1000]
data_PI_gini <- data_PI_gini[P_counter_temp_entry < 1e+5 & P_counter_temp_entry > 1e-5 & P_true_temp_entry < 1e+5 & P_true_temp_entry > 1e-5]
data_PI_gini <- data_PI_gini[P_counter_temp_full < 1e+5 & P_counter_temp_full > 1e-5 & P_true_temp_full < 1e+5 & P_true_temp_full > 1e-5]
data_PI_gini <- data_PI_gini[index_mc_entry == 0]
data_PI_gini <- data_PI_gini[index_mc_full == 0]
data_PI_gini <- data_PI_gini[abs(diff_entry) < 1]
data_PI_gini <- data_PI_gini[abs(diff_full) < 1]
data_PI_gini <- data_PI_gini[index_u_beta_entry == T]
data_PI_gini <- data_PI_gini[index_u_beta_full == T]

data_PI_gini <- data_PI_gini[, .SD[((s_k < quantile(s_k, probs = 0.97)))], by = 'Declarant']

data_PI_gini <- data_PI_gini[, .SD[((v_counter_temp_entry < quantile(v_counter_temp_entry, probs = 0.95)) & (v_counter_temp_entry > quantile(v_counter_temp_entry, probs = 0.05)))], by = 'Declarant']
data_PI_gini <- data_PI_gini[, .SD[((v_counter_temp_full < quantile(v_counter_temp_full, probs = 0.95)) & (v_counter_temp_full > quantile(v_counter_temp_full, probs = 0.05)))], by = 'Declarant']
data_PI_gini <- data_PI_gini[, .SD[((v_true_temp_entry < quantile(v_true_temp_entry, probs = 0.95)) & (v_true_temp_entry > quantile(v_true_temp_entry, probs = 0.05)))], by = 'Declarant']
data_PI_gini <- data_PI_gini[, .SD[((v_true_temp_full < quantile(v_true_temp_full, probs = 0.95)) & (v_true_temp_full > quantile(v_true_temp_full, probs = 0.05)))], by = 'Declarant']

data_PI_gini = data_PI_gini[s_k != 0]
data_PI_gini[ , sum_w := sum(s_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini[ , omega_equ := s_k / sum_w]

# Uncomment if switch to equal weights
#data_PI_gini <- data_PI_gini[ , obs := .N, by = c('Year', 'Quarter', 'Declarant')]
#data_PI_gini <- data_PI_gini[ , omega_equ := 1/obs]

# Construct Price Index
#-------------------------------------------------------------------------------

data_PI_gini <- data_PI_gini[ , PI_1 := exp(1 - sum(omega_equ*log(omega_equ))), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , PI_2 := prod(exp((omega_equ/beta_price_temp)*v_true_temp_entry)), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , PI_3 := prod(gamma(1 - omega_equ/beta_price_temp)), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , paste("PI_true_entry_", i, sep = "") := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI_gini <- data_PI_gini[ , PI_2 := prod(exp((omega_equ/beta_price_temp)*v_counter_temp_entry)), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , PI_3 := prod(gamma(1 - omega_equ/beta_price_temp)), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , paste("PI_counter_entry_", i, sep = "") := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI_gini <- data_PI_gini[ , PI_1 := exp(1 - sum(omega_equ*log(omega_equ))), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , PI_2 := prod(exp((omega_equ/beta_price_temp)*v_true_temp_full)), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , PI_3 := prod(gamma(1 - omega_equ/beta_price_temp)), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , paste("PI_true_full_", i, sep = "") := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI_gini <- data_PI_gini[ , PI_2 := prod(exp((omega_equ/beta_price_temp)*v_counter_temp_full)), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , PI_3 := prod(gamma(1 - omega_equ/beta_price_temp)), by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , paste("PI_counter_full_", i, sep = "") := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI_gini <- data_PI_gini[ , .SD[1], by = c('Year', 'Quarter', 'Declarant')]
data_PI_gini <- data_PI_gini[ , c(1:3, 26:29)]
PI_gini_summary <- merge(PI_gini_summary, data_PI_gini, by = c("Year", "Quarter", "Declarant"), all = T)

}



# Regressions - Entry
#===============================================================================

# Select appropriate columns
#-------------------------------------------------------------------------------

cols <- grep("PI_true_entry", colnames(PI_gini_summary))
PI_true_entry <- PI_gini_summary[, cols, with = F]
cols <- grep("PI_counter_entry", colnames(PI_gini_summary))
PI_counter_entry <- PI_gini_summary[, cols, with = F]

Y_PI <- exp(Y_PI)
Y_PI <- Y_PI * 10000

# Compute real income and Gini
#-------------------------------------------------------------------------------

W_true_entry = Y_PI/PI_true_entry
W_counter_entry = Y_PI/PI_counter_entry
gini_W_true_PI_entry <- as.matrix(apply(W_true_entry[,c(1:61)], 1, gini))
gini_W_counter_PI_entry <- as.matrix(apply(W_counter_entry[,c(1:61)], 1, gini))
PI_gini_W_entry <- PI_gini_summary[, 1:3]
PI_gini_W_entry <- cbind(PI_gini_W_entry, gini_W_true_PI_entry, gini_W_counter_PI_entry)
names(PI_gini_W_entry)[4:5] <- c("gini_W_true", "gini_W_counter")
PI_gini_W_entry[ , d_gini_W := gini_W_counter / gini_W_true - 1]

reg_true_PI <- lm(gini_W_true ~ 0 + as.factor(Declarant), data = PI_gini_W_entry)
print(summary(reg_true_PI))

reg_counter_PI <- lm(gini_W_counter ~ 0 + as.factor(Declarant), data = PI_gini_W_entry)
print(summary(reg_counter_PI))

reg_d_gini_PI <- lm(d_gini_W ~ 0 + as.factor(Declarant), data = PI_gini_W_entry)
print(summary(reg_d_gini_PI))

# Format and save
#-------------------------------------------------------------------------------

declarant <- as.matrix(reg_d_gini_PI$coefficients)
declarant <- gsub("as.factor\\(Declarant\\)","",rownames(declarant))
declarant <- as.numeric(declarant)
income_data <- fread(file = "gdp_per_capita_2007.csv")
output <- merge(income_data, data.table(declarant, reg_true_PI$coefficients, reg_counter_PI$coefficients, reg_d_gini_PI$coefficients), by = "declarant")
colnames(output) <- c(colnames(output)[1:4], "gini_true", "gini_counter", "d_gini")
write.csv(file = "price_index/results/d_gini_entry.csv", output)


# Regressions - Full Adjustments
#============================================================================================================

# Select appropriate columns
#-------------------------------------------------------------------------------

cols <- grep("PI_true_full", colnames(PI_gini_summary))
PI_true_full <- PI_gini_summary[, cols, with = F]
cols <- grep("PI_counter_full", colnames(PI_gini_summary))
PI_counter_full <- PI_gini_summary[, cols, with = F]

# Compute real income and Gini
#-------------------------------------------------------------------------------

W_true_full = Y_PI/PI_true_full
W_counter_full = Y_PI/PI_counter_full
gini_W_true_PI_full <- as.matrix(apply(W_true_full[,c(1:61)], 1, gini))
gini_W_counter_PI_full <- as.matrix(apply(W_counter_full[,c(1:61)], 1, gini))
PI_gini_W_full <- PI_gini_summary[, 1:3]
PI_gini_W_full <- cbind(PI_gini_W_full, gini_W_true_PI_full, gini_W_counter_PI_full)
names(PI_gini_W_full)[4:5] <- c("gini_W_true", "gini_W_counter")
PI_gini_W_full[ , d_gini_W := gini_W_counter / gini_W_true - 1]

reg_true_PI <- lm(gini_W_true ~ 0 + as.factor(Declarant), data = PI_gini_W_full)
print(summary(reg_true_PI))

reg_counter_PI <- lm(gini_W_counter ~ 0 + as.factor(Declarant), data = PI_gini_W_full)
print(summary(reg_counter_PI))

reg_d_gini_PI <- lm(d_gini_W ~ 0 + as.factor(Declarant), data = PI_gini_W_full)
print(summary(reg_d_gini_PI))

# Format and save
#-------------------------------------------------------------------------------

declarant <- as.matrix(reg_d_gini_PI$coefficients)
declarant <- gsub("as.factor\\(Declarant\\)","",rownames(declarant))
declarant <- as.numeric(declarant)
income_data <- fread(file = "gdp_per_capita_2007.csv")
output <- merge(income_data, data.table(declarant, reg_true_PI$coefficients, reg_counter_PI$coefficients, reg_d_gini_PI$coefficients), by = "declarant")
colnames(output) <- c(colnames(output)[1:4], "gini_true", "gini_counter", "d_gini")
write.csv(file = "price_index/results/d_gini_full.csv", output)





